################################################################################ 
#
# Counterspeech encouraging users to adopt the perspective of minority 
# groups reduces hate speech and its amplification on social media
#
# Tab C5 - Balance of covariates  
#
################################################################################ 


rm(list = ls())

# Set to save results
setsave = T

################################################################################ 
#  LIBRARIES
################################################################################ 

library(dplyr)
library(estimatr)
library(xtable)


################################################################################ 
#   DATA AND FOLDER
################################################################################ 

wd = ''
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')

# Load main dataset
data = read.csv(paste0(wd_data, '/dataset_analysis.csv'))

################################################################################ 
#   VARIABLES
################################################################################

controls = c("account_age","followers_count","pre_treat_tweet_length", "total_tweets_pre",
             "tox_num_pre", "friends_count")
controls_names = c("Account Age","Followers Count","Avg Length of Tweets", "No of Tweets",
                  "No of Xenophobic Tweets", "Friends Count")

treatments = c('empathy', 'traditional', 'analogical', 'perspective_getting', 'disapproval')
treatments_lab = c( 'Empathy', 'Traditional Perspective Taking',  'Analogical Perspective Taking',
                    'Perspective Getting', 'Disapproval')


################################################################################ 
#   Scale variables
################################################################################ 

data[controls] = lapply(data[controls], scale)

################################################################################ 
#   Run the models on the final sample, excluding attrited accounts
################################################################################ 


MODELS = list()

n = 1

for (j in 1:length(controls)){
  for (i in 1:length(treatments)){
    
    # drop missings
    droper = union(which(is.na(data[controls[j]])), which(data[controls[j]]==Inf))
    
    # keep the right treatment and control group
    keeper = union(which(data[treatments[i]] == 1),  which(data$group_number == 5)) 
    keeper = setdiff(keeper, droper)
    
    # select rows
    y = data[keeper, controls[j]]
    d = data[,treatments[i]][keeper]

    # regression
    MODELS[[n]] = lm_robust(y ~ d, se_type='HC3')

    print('Done with: ')
    print(n)
    
    n = n+1
    
  }}




################################################################################ 
#   Save results in a container
################################################################################ 


make_container = function(list_to_be_transformed, outvars, outvars_name){
  MOD = list_to_be_transformed
  
  coef = sapply(MOD, function(m) summary(m)$coefficients[2, 1])
  se = sapply(MOD, function(m) summary(m)$coefficients[2, 2])
  pval = sapply(MOD, function(m) summary(m)$coefficients[2, 4])
  #sample = sapply(MOD, function(m) m$samplesize)
  
  container = cbind.data.frame(yvar = rep(outvars, each=5),
                               yvar_name = rep(outvars_name, each=5),
                               treat = rep(treatments, time=length(outvars)),
                               treatments_lab = rep(treatments_lab, time=length(outvars)),
                               coef = coef, se=se, pval=pval #,
                               #  samplesize = sample,
  )
  return(container)
}


container = make_container(MODELS, controls, controls_names)

container$yvar = NULL
container$treat = NULL
container = container[container$treatments_lab != 'Empathy',]
rownames(container) = NULL


if (setsave){write.csv(container, paste0(wd_res, '/tables/balance_full_sample.csv'), row.names = F)}
